In [ ]:
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Deep Learning Design Patterns - Code Labs

Lab Exercise #8 - Get Familiar with Mobile Convolutional Networks

Prerequistes:

1. Familiar with Python
2. Completed Chapter 4: Mobile Convolutional Networks

Objectives:

1. Use metaparameter to thin a MobileNet v1.
2. Code a mobile convolutional network style classifier.
3. Code a SqueezeNet fire block
4. Quantize a mobile convolutional network

Installs

For the last task, we will use opencv. So let's install it.


In [2]:
!pip install opencv-python


Collecting opencv-python
  Downloading opencv_python-4.2.0.34-cp37-cp37m-manylinux1_x86_64.whl (28.2 MB)
     |████████████████████████████████| 28.2 MB 4.9 MB/s eta 0:00:01
Requirement already satisfied: numpy>=1.14.5 in /opt/conda/lib/python3.7/site-packages (from opencv-python) (1.18.1)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.2.0.34

1. Use Metaparameter to Thin a MobileNet v1

Let's start with a MobileNet v1 coded using the procedural reuse design pattern.

You will need to:

1. Set the thinning factor (width multiplier) are various locations in the code.
2. Set the max value for ReLU to clip values above the max value.
3. Calculate the number of thinned filters in the mobilenet blocks.

In [ ]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import ZeroPadding2D, Conv2D, BatchNormalization, ReLU
from tensorflow.keras.layers import DepthwiseConv2D, GlobalAveragePooling2D, Reshape, Dropout


def stem(inputs, alpha):
    """ Construct the Stem Group
        inputs : input tensor
        alpha  : width multiplier
    """
    # Convolutional block
    # Replace the ?? by the thinning factor
    # HINT: reduce the number of filters (32) by the thinning factor
    x = ZeroPadding2D(padding=((0, 1), (0, 1)))(inputs)
    x = Conv2D(32 * ??, (3, 3), strides=(2, 2), padding='valid')(x)
    x = BatchNormalization()(x)
    # Replace the max value to clip
    # HINT: the best value found by the authors
    x = ReLU(??)(x)

    # Depthwise Separable Convolution Block
    x = depthwise_block(x, 64, alpha, (1, 1))
    return x
 
# Relace the ?? by the parameter for thinning
# HINT: the same parameter name as in the stem()
def learner(x, ??):
    """ Construct the Learner
        x      : input to the learner
        alpha  : width multiplier
    """
    # First Depthwise Separable Convolution Group
    x = group(x, 128, 2, alpha)

    # Second Depthwise Separable Convolution Group
    x = group(x, 256, 2, alpha)

    # Third Depthwise Separable Convolution Group
    x = group(x, 512, 6, alpha)

    # Fourth Depthwise Separable Convolution Group
    x = group(x, 1024, 2, alpha)
    return x
    
def group(x, n_filters, n_blocks, alpha):
    """ Construct a Depthwise Separable Convolution Group
        x         : input to the group
        n_filters : number of filters
        n_blocks  : number of blocks in the group
        alpha     : width multiplier
    """ 
    # In first block, the depthwise convolution is strided - feature map size reduction
    # Replace the ?? with the thinning factor
    # HINT: the name of the parameter passed to this function
    x = depthwise_block(x, n_filters, ??, strides=(2, 2))
    
    # Remaining blocks
    for _ in range(n_blocks - 1):
        x = depthwise_block(x, n_filters, alpha, strides=(1, 1))
    return x

def depthwise_block(x, n_filters, alpha, strides):
    """ Construct a Depthwise Separable Convolution block
        x         : input to the block
        n_filters : number of filters
        alpha     : width multiplier
        strides   : strides
    """
    # Apply the width filter to the number of feature maps
    # Replace the ?? with the thinned calculation for the number of filters.
    # HINT: multiple the number of filters by the thinning factor. Remember this will be a real number and the convolution layers require
    #       an integer, so you will need to cast the result.
    filters = ??

    # Strided convolution to match number of filters
    if strides == (2, 2):
        x = ZeroPadding2D(padding=((0, 1), (0, 1)))(x)
        padding = 'valid'
    else:
        padding = 'same'

    # Depthwise Convolution
    x = DepthwiseConv2D((3, 3), strides, padding=padding)(x)
    x = BatchNormalization()(x)
    x = ReLU(6.0)(x)

    # Pointwise Convolution
    x = Conv2D(filters, (1, 1), strides=(1, 1), padding='same')(x)
    x = BatchNormalization()(x)
    x = ReLU(6.0)(x)
    return x

def classifier(x, alpha, dropout, n_classes):
    """ Construct the classifier group
        x         : input to the classifier
        alpha     : width multiplier
        dropout   : dropout percentage
        n_classes : number of output classes
    """
    # Flatten the feature maps into 1D feature maps (?, N)
    x = GlobalAveragePooling2D()(x)

    # Reshape the feature maps to (?, 1, 1, 1024)
    shape = (1, 1, int(1024 * alpha))
    x = Reshape(shape)(x)
    # Perform dropout for preventing overfitting
    x = Dropout(dropout)(x)

    # Use convolution for classifying (emulates a fully connected layer)
    x = Conv2D(n_classes, (1, 1), padding='same', activation='softmax')(x)
    # Reshape the resulting output to 1D vector of number of classes
    x = Reshape((n_classes, ))(x)
    return x

# Meta-parameter: width multiplier (0 .. 1) for reducing number of filters.
# Replace the ?? with the thinning factor - let's start with no thinning
# HINT: one
alpha      = ??   

# Meta-parameter: dropout rate
dropout    = 0.5 

inputs = Input(shape=(224, 224, 3))

# The Stem Group
x = stem(inputs, alpha)    

# The Learner
x = learner(x, alpha)

# The classifier for 1000 classes
outputs = classifier(x, alpha, dropout, 1000)

# Instantiate the Model
model = Model(inputs, outputs)

Verify the model using summary method

It end of the output should look like below.

_________________________________________________________________
re_lu_26 (ReLU)              (None, 7, 7, 1024)        0         
_________________________________________________________________
global_average_pooling2d (Gl (None, 1024)              0         
_________________________________________________________________
reshape (Reshape)            (None, 1, 1, 1024)        0         
_________________________________________________________________
dropout (Dropout)            (None, 1, 1, 1024)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 1, 1, 1000)        1025000   
_________________________________________________________________
reshape_1 (Reshape)          (None, 1000)              0         
=================================================================
Total params: 4,264,808
Trainable params: 4,242,920
Non-trainable params: 21,888

In [ ]:
model.summary()

Try thinning the MobileNet v1

Let's now thin this MobileNet v1 you built. Try thinning it by a factor of 0.25.

1. Modify the above code and change alpha = 1, to alpha = 0.25
2. Do a summary() on the model and see how it changed the number of parameters

2. Code a Mobile Convolutional Network style Classifier

Let's now code the mobile convolutional network style for a classifier, where a convolutional layer is used in place of a dense layer.

1. Set the number of filters in Conv2D to the number of classes.
2. Set the layer to for reducing and flattening the feature maps.
3. Set the final activation function for the class probability distribution.

In the summary, you should see 513K parameters.


In [ ]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D, Activation

def classifier(x, n_classes):
    ''' Construct the Classifier 
        x        : input to the classifier
        n_classes: number of output classes
    '''
    # Repace the ?? with the number of filters
    # HINT: set the number of filters equal to number of classes
    x = Conv2D(??, (1, 1), strides=1, activation='relu', padding='same')(x)

    # reduce each filter (class) to a single value and flatten to a 1D vector
    # Replace the ?? with the layer that does global average pooling and flattens into 1D vector
    # HINT: the name of the layer is in the the import from tensorflow.keras.layers
    x = ??()(x)
    
    # Replace the ?? with the activation function (string name) used for multi-classification
    # HINT: it's the same as if we used a dense layer
    outputs = Activation(??)(x)
    return outputs

# let's pretend this is the final feature map size and numbern before the classifier
final_feature_maps = Input((4, 4, 512))
model = Model(final_feature_maps, classifier(final_feature_maps, 1000))
model.summary()

3. Code a SqueezeNet Fire Block

Let's code a fire block. You will need to:

1. Set the number of filters for the squeeze layer.
2. Set the inputs to the expand layers.
3. Finish the concatenation of the feature maps from the two expand branches.

In [ ]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, Concatenate

def fire_block(x, n_filters):
    ''' Construct a Fire Block
        x        : input to the block
        n_filters: number of filters
    '''
    # squeeze layer
    # Replace the ?? with a bottleneck filter size
    # HINT: A bottleneck is a 1 by 1 filter that learns how to reduce size of number of feature maps
    squeeze = Conv2D(n_filters, (??, ??), strides=1, activation='relu', padding='same')(x)

    # branch the squeeze layer into a 1x1 and 3x3 convolution and double the number of filters
    # Replace the ??s with the input from the squeeze layer
    # HINT: both convolutional layers have the same input
    expand1x1 = Conv2D(n_filters * 4, (1, 1), strides=1, activation='relu', padding='same')(??)
    expand3x3 = Conv2D(n_filters * 4, (3, 3), strides=1, activation='relu', padding='same')(??)

    # concatenate the feature maps from the 1x1 and 3x3 branches
    # Replace the ?? with the output from the 3x3 expand branch
    # HINT: it's the branch with the 3x3 filter
    x = Concatenate()([expand1x1, ??])
    return x

# The input shape
inputs = Input((224, 224, 3))

outputs = fire_block(inputs, 16)

# Instantiate the Model
model = Model(inputs, outputs)
model.summary()

Verify the block using summary method

The output should look like below.

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_3 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 224, 224, 16) 64          input_3[0][0]                    
__________________________________________________________________________________________________
conv2d_4 (Conv2D)               (None, 224, 224, 64) 1088        conv2d_3[0][0]                   
__________________________________________________________________________________________________
conv2d_5 (Conv2D)               (None, 224, 224, 64) 9280        conv2d_3[0][0]                   
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 224, 224, 128 0           conv2d_4[0][0]                   
                                                                 conv2d_5[0][0]                   
==================================================================================================
Total params: 10,432
Trainable params: 10,432
Non-trainable params: 0

In [ ]:
model.summary()

4. Quantize a mobile convolutional network

We will start by using a pre-built MobileNetV2 model from Tf.Keras which is trained on ImageNet.

Next, we will use the model to do a prediction of the image of an Apple. The integer label returned from the prediction will be 948 (that's an apple)


In [12]:
from tensorflow.keras.applications import MobileNetV2
import cv2
import numpy as np

# Let's use a prebuilt MobileNet model trained on ImageNet
model = MobileNetV2(input_shape=(224, 224, 3), weights='imagenet')

# Let's make a prediction with the unquantized (large) version of the model

# We will use the image of an apple and preprocess it for the model
image = cv2.imread('apple.png')
image = cv2.resize(image, (224, 224))
image = (image / 255.0).astype(np.float32)

# now make the prediction
probabilities = model.predict(np.asarray([image]))
prediction = np.argmax(probabilities)

# Okay, it predicts the label associated with the value 948 (apple)
print("prediction", prediction)


Downloading data from https://github.com/JonathanCMitchell/mobilenet_v2_keras/releases/download/v1.1/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224.h5
14540800/14536120 [==============================] - 1s 0us/step
prediction 948

Quantize the Model

Let's now use TFLite to quantatize the model and then do a prediction with the quantatized model:


In [19]:
import tensorflow as tf

# Create an instance of the converter for TF.Keras (keras format) model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Convert the model to the TFLite format
# Replace the ?? with the method that converts the keras (large) model to a quantatized TFLite model
# HINT: method is called convert.
tflite_model = converter.??()

# Instantiate an interpreter for the TFLite model
interpreter = tf.lite.Interpreter(model_content=tflite_model)
# Allocate the input and output tensors for the model
interpreter.allocate_tensors()

# Get input and output tensors details needed for prediction
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# pass the image as a batch to the input tensor
# Replace the ?? with the image of an apple as a batch of 1.
# HINT: look back on ho
interpreter.set_tensor(input_details[0]['index'], ??)

# Execute (invoke) the interpreter to perform the prediction
interpreter.invoke()

# Get the output from the model
softmax = interpreter.get_tensor(output_details[0]['index'])

# multi-class example, determine the label predicted from the softmax output
prediction = np.argmax(softmax)
print("prediction", prediction)


prediction 948

End of Lab Exercise